In this R Markdown document we will display and document each step of CS329e R Project 5.

Project Requirements http://www.cs.utexas.edu/~cannata/dataVis/Projects/Tableau%20Project%202/R%20Project%20Requirements.html.

The dataset for this project pertains to airline on-time performance for airports throughout United States. To reduce the sheer volume of records, we choose to focus the analysis on data only from JFK. utilizing Tableau to created visualization of the relationships we uncovered.

Distance vs Average Delay

Avg(Delay) vs Age of Aircraft

Age of Aircraft vs Delay

Carrier vs Avg(Delay)

Crosstabs rank()

source("../01 SQL Crosstabs/Rank.R", echo = TRUE)
## 
## > require("RCurl")
## Loading required package: RCurl
## Loading required package: bitops
## 
## > require("jsonlite")
## Loading required package: jsonlite
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:utils':
## 
##     View
## 
## > require("dplyr")
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## > require("plyr")
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## > require("tidyr")
## Loading required package: tidyr
## 
## > rank <- data.frame(fromJSON(getURL(URLencode(gsub("\n", 
## +     " ", "129.152.144.84:5001/rest/native/?query=\"select uniquecarrier, dest, COUNT(dest .... [TRUNCATED] 
## 
## > tbl_df(rank)
## Source: local data frame [135 x 4]
## 
##    UNIQUECARRIER DEST TOTAL_RECORDS DEST_RANK
## 1             AA  LAX          3400         1
## 2             AA  MIA          1828         2
## 3             AA  SFO          1801         3
## 4             AA  SJU          1423         4
## 5             AA  DFW           741         5
## 6             AA  ORD           672         6
## 7             AA  SEA           366         7
## 8             AA  MCO           366         7
## 9             AA  LAS           366         7
## 10            AA  SAN           366         7
## ..           ...  ...           ...       ...

Crosstabs last_value()/max_value and difference

source("../01 SQL Crosstabs/lastval_dif.R", echo = TRUE)
## 
## > require("RCurl")
## 
## > require("jsonlite")
## 
## > require("plyr")
## 
## > require("dplyr")
## 
## > require("tidyr")
## 
## > lastval_dif <- data.frame(fromJSON(getURL(URLencode(gsub("\n", 
## +     " ", "129.152.144.84:5001/rest/native/?query=\n\"select distinct dest, uniquec .... [TRUNCATED] 
## 
## > tbl_df(lastval_dif)
## Source: local data frame [6,167 x 5]
## 
##    DEST UNIQUECARRIER AIRTIME MAX_AIRTIME ABERRATION
## 1   ACK            B6      37          76         39
## 2   ACK            B6      38          76         38
## 3   ACK            B6      40          76         36
## 4   ACK            B6      41          76         35
## 5   ACK            B6      42          76         34
## 6   ACK            B6      43          76         33
## 7   ACK            B6      44          76         32
## 8   ACK            B6      45          76         31
## 9   ACK            B6      46          76         30
## 10  ACK            B6      47          76         29
## ..  ...           ...     ...         ...        ...

Crosstabs nth_value

source("../01 SQL Crosstabs/Nthval.R", echo = TRUE)
## 
## > require("RCurl")
## 
## > require("jsonlite")
## 
## > require("plyr")
## 
## > require("dplyr")
## 
## > require("tidyr")
## 
## > nthval <- data.frame(fromJSON(getURL(URLencode(gsub("\n", 
## +     " ", "129.152.144.84:5001/rest/native/?query=\"\nselect l.month, l.dest, l.c_dest,  .... [TRUNCATED] 
## 
## > tbl_df(nthval)
## Source: local data frame [544 x 4]
## 
##    MONTH DEST C_DEST SECONDHIGHEST_CANCELLATION
## 1      1  ORD     22                         19
## 2      1  BOS     19                         19
## 3      1  SFO     18                         19
## 4      1  LAX     14                         19
## 5      1  CLT     11                         19
## 6      1  DCA     10                         19
## 7      1  BUF      9                         19
## 8      1  PIT      7                         19
## 9      1  BTV      7                         19
## 10     1  TPA      6                         19
## ..   ...  ...    ...                        ...

Crosstabs cume_dist

source("../01 SQL Crosstabs/cumedist.R", echo = TRUE)
## 
## > require("RCurl")
## 
## > require("jsonlite")
## 
## > require("plyr")
## 
## > require("dplyr")
## 
## > require("tidyr")
## 
## > cumedist <- data.frame(fromJSON(getURL(URLencode(gsub("\n", 
## +     " ", "129.152.144.84:5001/rest/native/?query=\"select  G.dest,  G.avg_airtime, cu .... [TRUNCATED] 
## 
## > tbl_df(cumedist)
## Source: local data frame [68 x 3]
## 
##    DEST AVG_AIRTIME PERCENTILE
## 1   HPN    19.00000 0.01470588
## 2   LGA    22.50000 0.02941176
## 3   BDL    24.93792 0.04411765
## 4   PVD    31.60531 0.05882353
## 5   ALB    32.73851 0.07352941
## 6   PHL    33.92096 0.08823530
## 7   BWI    39.21727 0.10294118
## 8   BOS    39.60775 0.11764706
## 9   SYR    44.53689 0.13235295
## 10  BTV    45.50635 0.14705883
## ..  ...         ...        ...